package com.kkb.robotcustomerservice.util;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary;
import com.hankcs.hanlp.seg.common.Term;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;


import java.io.*;
import java.util.ArrayList;
import java.util.List;

public class WordsUtil {

    //这里可以优化成配置文件配置
    public static final String stopWordsPath = "data-for-1.7.5\\data\\dictionary\\hit_stopwords.txt";

    public static ArrayList<String> stopWords = new ArrayList();

    static {
        try {
            Resource resource = new ClassPathResource(stopWordsPath);
            File stopWordsFile = resource.getFile();
            //File stopWordsFile = new File( Thread.currentThread().getContextClassLoader().getResource("").getPath() + stopWordsPath);
            BufferedReader br = new BufferedReader(new FileReader(stopWordsFile));
            String stopWord = null;
            while ((stopWord = br.readLine()) != null) {//使用readLine方法，一次读一行 读取停用词
                stopWords.add(stopWord);
            }
            br.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static List<Term> splitWords(String words){
        List<Term> wordList = HanLP.segment(words);
        CoreStopWordDictionary.apply(wordList);
        return wordList;
    }

    public static List<String> removeStop(List<Term> wordList){
        List<String> wordStringList = new ArrayList<>();
        for (Term term:wordList) {
            wordStringList.add(term.word);
        }
        wordStringList.removeAll(stopWords);
        return wordStringList;
    }

    //测试方法：床前明月光，疑是地上霜。举头望明月，低头思故乡。王少昆
    //王少昆是我添加进哈工大停用词表中的 ，原版没有测试完注意删除
    //src/main/resources/data-for-1.7.5/data/dictionary/custom
    //hit_stopwords.txt是哈工大停用词表
//    public static void main(String[] args) {
//        String words = "床前明月光，疑是地上霜。举头望明月，低头思故乡。王少昆，河北" ;
//        System.out.println(removeStop(splitWords(words)));
//    }
}
